library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(modelr)
library(purrr)
library(tidyr)

Import the Data

file_path = "./PL_1819_data.xlsx"


player_df = read_excel(file_path, sheet = 4)
## New names:
## • `Gls` -> `Gls...12`
## • `Ast` -> `Ast...13`
## • `G+A` -> `G+A...14`
## • `G-PK` -> `G-PK...15`
## • `xG` -> `xG...20`
## • `npxG` -> `npxG...21`
## • `xAG` -> `xAG...22`
## • `npxG+xAG` -> `npxG+xAG...23`
## • `Gls` -> `Gls...27`
## • `Ast` -> `Ast...28`
## • `G+A` -> `G+A...29`
## • `G-PK` -> `G-PK...30`
## • `xG` -> `xG...32`
## • `xAG` -> `xAG...33`
## • `npxG` -> `npxG...35`
## • `npxG+xAG` -> `npxG+xAG...36`

Data Wrangling

cleaned_player_df = player_df  |> 
 mutate(Age = as.numeric(as.character(Age)),
 Min = as.numeric(as.character(Min)))  |> 
 na.omit()
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Age = as.numeric(as.character(Age))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

Scatterplot for first Visualizing the Relationship between Age and Min

fig = plot_ly(cleaned_player_df, x = ~Age, y = ~Min, type = 'scatter', mode = 'markers',
               hoverinfo = 'text',
               text = ~paste('Player:', Player))  # Hover text

# Adding layout
fig = fig %>% layout(title = 'Interactive Scatterplot of Age vs Minutes Played',
                      xaxis = list(title = 'Age'),
                      yaxis = list(title = 'Minutes Played'))

# Show the plot
fig